In [2]:
import glob
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
from matplotlib import animation, rc
import matplotlib.colors as colors
from IPython.display import HTML
import seaborn as sns
from netCDF4 import Dataset
from sklearn.neighbors import KDTree, BallTree

The current assumption is that there is a need to discriminate between surficial and sub-surface fires, as the coefficients need to be applied appropriately. Previous attempts to perform this discrimination have focused on a spectral ratio approach that attempted to determine whether a fire was actively flaming (indicating a surface fire) or not (assumed to indicate a subsurface peat fire). It was unsuccesful and no such discrimination could be effectively achieved, this is likely due to mixed fire phases occupying a single pixel, i.e. whilst near-surface and deep peat fires are likely dominated by smouldering, surface fires also likely have a significant proportion of thier composition associated with smouldering and so it is difficult to effectively discriminate between them.

After further analysis the need for such a disrimination has been brought into question. If we make the assumption that all fires begin on the surface, then any new fires much be associated with surface burning. One issue with this though is that peat fires can spread underground, meaning that a thermal anomaly that appears in a certain location may come from a subsurface peat fire. This would only be an issue if this subsurface fire move between pixels, and the question then is how likely are subsurface peat fire to move between pixels? The probability of this happening is low (can model this - for a 2km pixel assuming a 1m/day spread speed and a 90 day burn period you have a 9% chance of a fire with a pixel crossing occuring), as peat fire spread speeds are very low compared to the geostationary imager pixel size (< 10 cm/h vs. 2km).

So therefore, we can justify that nearly all fires start as surface fires.

Issue of repeat burns - when do we consider a repeat burn of a pixel a new fire? Does this even matter? How many fires burn over the same pixel? How long do peat fires typically last? We can assess this and then put in a threshold, if less than a certain number of days then assume it is not a new fire.

Refs:

https://www.jstage.jst.go.jp/article/tropics/14/1/14_1_1/_pdf

So what to do next: • Fire lifetime distributions, peat and non-peat


In [9]:
# read in the himawari lat and lon data
#geo_file = '/Users/danielfisher/Projects/kcl-fire-aot/data/Asia/processed/himawari/Himawari_lat_lon.img'
geo_file = '/Users/dnf/Projects/kcl-fire-aot/data/Asia/processed/himawari/Himawari_lat_lon.img'
geostationary_lats, geostationary_lons = geo_read(geo_file)

In [11]:
# load in the peat maps
#path_to_peat_maps = '/Users/danielfisher/Projects/kcl-fire-aot/data/Asia/external/peat_maps/'
path_to_peat_maps = '/Users/dnf/Projects/kcl-fire-aot/data/Asia/external/peat_maps/'
df_list = []
peat_maps_paths = glob.glob(path_to_peat_maps + '*/*.nc')
for peat_maps_path in peat_maps_paths:
    peat_map_key = peat_maps_path.split("/")[-1].split(".")[0]
    d = read_nc(peat_maps_path)
    df_list.append(pd.DataFrame.from_dict(d))
peat_df = pd.concat(df_list)

In [12]:
# path = '/Users/danielfisher/Projects/kcl-fire-aot/data/Asia/processed/himawari/updated/frp_df.p'
path = '/Users/dnf/Projects/kcl-fire-aot/data/Asia/processed/himawari/updated/frp_df.p'
fires_df = pd.read_pickle(path)

In [13]:
# spatial subset to ROI
mask = ((fires_df.LATITUDE > -7) & (fires_df.LATITUDE < 4) & 
        (fires_df.LONGITUDE > 94) & (fires_df.LONGITUDE < 119))
fires_df = fires_df[mask]

In [14]:
fires_df = fires_df.sort_values('obs_time', ascending=True)
fires_df.reset_index(inplace=True)
fires_df.drop('index', axis=1, inplace=True)

In [15]:
id_array = np.zeros([5500, 5500])
time_array = np.zeros([5500, 5500])

fire_ids = fire_ids = np.zeros(fires_df.shape[0]) 
current_max_fire_id = 1

for i, row in fires_df.iterrows():
    
    l = int(row.ABS_line)
    s = int(row.ABS_samp)
    
    nearby_ids = (id_array[l-1:l+2, s-1:s+2]).flatten()  # get the 3x3 array centred on the current sample
    
    # convert row obs time to number
    row_obs_time = (row.obs_time - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
    
    # if no previous fires in the 3x3 region
    if np.max(nearby_ids) == 0:
        id_array[l,s] = current_max_fire_id  # set the id in the array
        time_array[l,s] = row_obs_time  # TODO convert time to number
        fire_ids[i] = current_max_fire_id  # assign the id
        current_max_fire_id += 1  # update the id
        
    # if there are previous fires in the 3x3 region then find the 
    # temporally nearest, and use that to update the arrays
    else:
        nearby_times = (time_array[l-1:l+2, s-1:s+2]).flatten()
        nearest_time_index = np.argmin(np.abs(nearby_times - row_obs_time))  # TODO convert time to number
        nearest_id = nearby_ids[nearest_time_index]  # get the temporally closest id
        nearest_time = nearby_times[nearest_time_index]

        # update
        id_array[l,s] = nearest_id
        time_array[l,s] = nearest_time
        fire_ids[i] = nearest_id


/Users/dnf/anaconda2/envs/kcl-fire-aot/lib/python2.7/site-packages/ipykernel_launcher.py:15: DeprecationWarning: parsing timezone aware datetimes is deprecated; this will raise an error in the future
  from ipykernel import kernelapp as app

In [16]:
# append fire ids to df and drop single detections
fires_df['fire_id'] = fire_ids
fires_df['sample_count'] = 1 
fires_df_samples = fires_df.groupby('fire_id', as_index=False).agg({'sample_count': np.sum})
fires_df.drop('sample_count', axis=1, inplace=True)

# lets keep only more persistent fires
fires_df_samples = fires_df_samples[fires_df_samples.sample_count > 48]

# now merge 
subset_fires_df = pd.merge(fires_df, fires_df_samples, on='fire_id')

In [17]:
# now add the peat information to the subset fires
subset_fires_df = assign_peat(peat_df, subset_fires_df)

In [18]:
# compute the number of days since the first burn

df_with_burn_times = subset_fires_df.copy()
df_with_burn_times['min_time'] = df_with_burn_times.obs_time.copy()
df_with_burn_times = df_with_burn_times.groupby(['ABS_line', 'ABS_samp'],as_index=False).agg({'min_time': np.min})

# standardise the start time
df_with_burn_times.min_time = df_with_burn_times.min_time.dt.normalize() 

# merge first time burn onto sub df
df_with_burn_times = pd.merge(subset_fires_df, df_with_burn_times, on=['ABS_line', 'ABS_samp'])

# get days since first burn as a float
subset_fires_df['days_since_first_burn'] = (df_with_burn_times.obs_time.dt.normalize() - 
                                            df_with_burn_times.min_time) / np.timedelta64(24, 'h')

# add in solar time
subset_fires_df['local_solar_time'] = subset_fires_df.obs_time + pd.to_timedelta((subset_fires_df.LONGITUDE + 180 ) / 360 * 12, 'h')

# get hour since local midnight
subset_fires_df['hours_since_local_midnight'] = ((subset_fires_df.local_solar_time - 
                                                  subset_fires_df.local_solar_time.dt.normalize()) / np.timedelta64(1, 'h')).astype(int)

In [19]:
subset_fires_df.head()


Out[19]:
FIRE_CONFIDENCE ABS_line ABS_samp BT_MIR BT_TIR FRP_0 LATITUDE LONGITUDE obs_time obs_date fire_id sample_count is_peat days_since_first_burn local_solar_time hours_since_local_midnight
0 0.614031 2552.0 651.0 314.550 293.622 55.8281 3.77182 96.6114 2015-07-02 03:50:00 2015-07-02 1.0 73 False 0.0 2015-07-02 13:03:13.368000 13
1 0.640497 2552.0 650.0 312.329 292.752 41.5363 3.77209 96.5812 2015-07-02 03:50:00 2015-07-02 1.0 73 True 0.0 2015-07-02 13:03:09.742800 13
2 0.628033 2552.0 650.0 313.228 293.024 46.6538 3.77209 96.5812 2015-07-02 04:00:00 2015-07-02 1.0 73 True 0.0 2015-07-02 13:13:09.742800 13
3 0.704558 2552.0 651.0 315.995 293.649 65.0966 3.77182 96.6114 2015-07-02 04:00:00 2015-07-02 1.0 73 False 0.0 2015-07-02 13:13:13.368000 13
4 0.699888 2552.0 651.0 313.854 293.892 50.4289 3.77182 96.6114 2015-07-02 04:10:00 2015-07-02 1.0 73 False 0.0 2015-07-02 13:23:13.368000 13

In [21]:
subset_fires_df_test = subset_fires_df.copy()
subset_fires_df_test = subset_fires_df_test.groupby(['fire_id', 'is_peat'], as_index=False).agg({'days_since_first_burn': np.max})

In [36]:
# lets assume a 1m per day progression so we can just use the day value 
max_permitted_distance_to_pixel_edge = 2 - subset_fires_df_test.days_since_first_burn.values / 1000  # in km

# now lets compute probability of fire reaching the pixel edge.  To understand this imagine that
# the fire can start anywhere in the pixel, but only if it starts within a certain away from the
# pixel edge will it progress into an adjacent pixel.  All possible start locations are defined
# by the pixel size, those locations near the edge that might progress in to an adjancent pixel
# are those locations within some distance of the pixel border.  This is based on the speed of
# of the fire spread (here assumed to be 1 m/day).  
prob_of_reaching_pixel_edge = 1 - (max_permitted_distance_to_pixel_edge**2 / 2**2)

# then can plot the distribution of that happening
sns.distplot(prob_of_reaching_pixel_edge)
plt.show()



In [ ]:


In [ ]:


In [211]:
mask = subset_fires_df.is_peat
hours_peat = subset_fires_df[mask].local_solar_time.dt.hour.values
bts_peat = subset_fires_df[mask].BT_MIR.values
sns.barplot(hours_peat, bts_peat-300, color='k')

hours_not_peat = subset_fires_df[~mask].local_solar_time.dt.hour.values
bts_not_peat = subset_fires_df[~mask].BT_MIR.values
sns.barplot(hours_not_peat, bts_not_peat-300, color='r')


plt.show()



In [11]:
plt_array = np.zeros([5500, 5500])

for i_d in subset_fires_df.fire_id.unique():
    id_subset = subset_fires_df[subset_fires_df.fire_id == i_d]
    plt_array[id_subset.ABS_line.astype(int), id_subset.ABS_samp.astype(int)] = i_d

In [15]:
plt.close('all')
fig = plt.figure(figsize=(16, 12))

lat_subset = geostationary_lats[2500:3200, 600:1600]
lon_subset = geostationary_lons[2500:3200, 600:1600]
to_plot = plt_array[2500:3200, 600:1600]
to_plot = np.ma.masked_array(to_plot,  to_plot==0)

img_extent = (lon_subset[0,0], 
              lon_subset[-1,-1], 
              lat_subset[-1,-1], 
              lat_subset[0,0])

ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent(img_extent, crs=ccrs.PlateCarree())

# set a margin around the data
# ax.set_xmargin(0.05)
# ax.set_ymargin(0.10)

# add the image. Because this image was a tif, the "origin" of the image is in the
# upper left corner
#ax.imshow(plt_array[2500:3200, 600:1600], origin='upper', extent=img_extent, transform=ccrs.PlateCarree())
ax.coastlines(resolution='50m', color='black', linewidth=1)

ax.pcolormesh(lon_subset, lat_subset, to_plot, cmap='tab20b')

# # mark a known place to help us geo-locate ourselves
# ax.plot(-117.1625, 32.715, 'bo', markersize=7, transform=ccrs.Geodetic())
# ax.text(-117, 33, 'San Diego', transform=ccrs.Geodetic())

plt.show()



In [1]:
plt.close('all')
anim = animate_timeseries(subset_fires_df)
#anim.save('animation.gif', writer='imagemagick', fps=60)
HTML(anim.to_html5_video())


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-8d749626c224> in <module>()
----> 1 plt.close('all')
      2 anim = animate_timeseries(subset_fires_df)
      3 #anim.save('animation.gif', writer='imagemagick', fps=60)
      4 HTML(anim.to_html5_video())

NameError: name 'plt' is not defined

In [123]:
plt.close('all')
df_with_burn_times = subset_fires_df.copy()
df_with_burn_times['min_time'] = df_with_burn_times.obs_time.copy()
df_with_burn_times = df_with_burn_times.groupby(['ABS_line', 'ABS_samp'],as_index=False).agg({'min_time': np.min})

# standardise the start time
df_with_burn_times.min_time = df_with_burn_times.min_time.dt.normalize() 

# merge first time burn onto sub df
df_with_burn_times = pd.merge(subset_fires_df, df_with_burn_times, on=['ABS_line', 'ABS_samp'])

# get time since first burn in days
df_with_burn_times['time_since_first_burn'] = (df_with_burn_times.obs_time - df_with_burn_times.min_time) / np.timedelta64(1, 'm') / 1440.



df_with_burn_times = df_with_burn_times[(df_with_burn_times.time_since_first_burn > 0) &
                                        (df_with_burn_times.time_since_first_burn < 5)]

sns.jointplot(x="time_since_first_burn", y="BT_MIR", data=df_with_burn_times, kind='hex', )
plt.show()


Utils:


In [5]:
def assign_peat(peat_df, fire_df):
    
    peat_lat_lon = np.dstack([np.deg2rad(peat_df.lats.values), 
                              np.deg2rad(peat_df.lons.values)])[0]
    peat_balltree = BallTree(peat_lat_lon, metric='haversine')

    # get the unique flare lats and lons for assessment in kdtree
    fire_locations = np.array(zip(np.deg2rad(fire_df.LATITUDE.values), 
                                   np.deg2rad(fire_df.LONGITUDE.values)))

    # compare the flare locations to the potential locations in the orbit
    distances, indexes = peat_balltree.query(fire_locations, k=1) 
    
    # set up the dataframe to hold the distances
    fire_df['is_peat'] = peat_df.is_peat.values[indexes]
    return fire_df

In [6]:
# Animation function

plt.close('all')
my_cmap = plt.get_cmap('tab20b')

def animate_timeseries(df, interval=25):  
    
    vmin = df.fire_id.values.min()
    vmax = df.fire_id.values.max()
    
    # set up the lats and the lons
    lat_subset = geostationary_lats[2500:3200, 600:1600]
    lon_subset = geostationary_lons[2500:3200, 600:1600]
     
    # generat the array to plot
    to_plot = np.zeros([5500, 5500]) 
    
    # list to hold data
    lines_list = [] 
    samples_list = []
    date_list = []
    id_list = []
    
    unique_times = df.obs_time.unique()
    unique_times.sort()
    for i, t in enumerate(unique_times):
        
#         if i < 0:
#             continue
        
        t_df = df[(df.obs_time == t)]
        lines_list.append(t_df.ABS_line.values.astype(int))
        samples_list.append(t_df.ABS_samp.values.astype(int))
        date_list.append(t)
        id_list.append(t_df.fire_id.values)
        
#         if i == 600:
#             break

    # Create new Figure and an Axes which fills it.
    fig = plt.figure(figsize=(14, 10))
    ax = plt.axes(projection=ccrs.PlateCarree())
    ax.coastlines(resolution='50m', color='black', linewidth=1)

    # Build the array to plot
    to_plot[lines_list[0], samples_list[0]] = id_list[0]
    to_plot = np.ma.masked_array(to_plot,  to_plot==0)

    # plot it with pcolormesh and add in the date
    z = to_plot[2500:3200, 600:1600]
    z = z[:-1, :-1]
    quad = ax.pcolormesh(lon_subset, lat_subset, z, vmin=vmin, vmax=vmax, cmap=my_cmap)
    date_text = ax.text(0.05, 0.05, '', transform=ax.transAxes)

    def update(i):
         
        # Update the quadmesh
        to_plot[lines_list[i], samples_list[i]] = id_list[i]
        z = to_plot[2500:3200, 600:1600]
        z = z[:-1, :-1]
        rav_z = z.ravel()
        quad.set_array(rav_z)
        new_color = my_cmap(rav_z)
        quad.update({'facecolors':new_color})
        date_text.set_text("Date = {}".format(date_list[i]))

    # # Construct the animation, using the update function as the animation
    # # director.
    anim = animation.FuncAnimation(fig, update, frames=len(lines_list), interval=interval)
    return anim

In [7]:
def geo_read(f, verbose=False):
    """
    read in the static data like view angle, landcover
    put them in a data dictionary
    """
    dim = 5500  # hard coded for Himawari8 possible it is 5500 in which case we need to zoom
    if verbose:
        print 'reading file %s' % f
    dtype = np.float32
    shape = (2, dim, dim)
    data = np.fromfile(f, dtype=dtype).reshape(shape)
    lat = data[0, :, :].astype(dtype)
    lon = data[1, :, :].astype(dtype)
    return lat, lon

In [8]:
def read_nc(f):
    nc_file = Dataset(f)

    # get geo
    lon_dims = nc_file['x_range'][:]
    lat_dims = nc_file['y_range'][:]
    spacing = nc_file['spacing'][:]
    lons = np.arange(lon_dims[0], lon_dims[1], spacing[0])
    lats = np.flipud(np.arange(lat_dims[0], lat_dims[1], spacing[1]))
    lons, lats = np.meshgrid(lons, lats)

    # get mask
    z = nc_file['z'][:].reshape([3000, 3000])
    mask = z > 0
    return {"is_peat": mask.ravel(), "lats": lats.ravel(), "lons": lons.ravel()}

In [ ]:
def build_frp_df(path):
    '''

    :param path: path to the frp csv files and dataframe
    :return: dataframe holding frp
    '''
    frp_csv_files = glob.glob(path + '*.csv')
    df_from_each_file = (pd.read_csv(f) for f in frp_csv_files)
    frp_df = pd.concat(df_from_each_file, ignore_index=True)

    # keep only columns on interest
    frp_df = frp_df[['FIRE_CONFIDENCE', 'ABS_line', 'ABS_samp', 'BT_MIR', 'BT_TIR',
                     'FRP_0', 'LATITUDE', 'LONGITUDE', 'year', 'month', 'day', 'time']]

    # reindex onto date
    for k in ['year', 'month', 'day', 'time']:
        frp_df[k] = frp_df[k].astype(int).astype(str)
        if k == 'time':
            frp_df[k] = frp_df[k].str.zfill(4)
        if k in ['month', 'day']:
            frp_df[k] = frp_df[k].str.zfill(2)

    format = '%Y%m%d%H%M'
    frp_df['obs_time'] = pd.to_datetime(frp_df['year'] +
                                        frp_df['month'] +
                                        frp_df['day'] +
                                        frp_df['time'], format=format)
    frp_df['obs_date'] = frp_df['obs_time'].dt.date

    # drop columns we dont needs
    #frp_df.drop(['LONGITUDE', 'LATITUDE'], axis=1, inplace=True)
    frp_df.drop(['year', 'month', 'day', 'time'], axis=1, inplace=True)

    frp_df.to_pickle(path + 'frp_df.p')
    
build_frp_df('/Users/danielfisher/Projects/kcl-fire-aot/data/Asia/processed/himawari/updated/')

In [ ]: